clear all
set maxvar 30000
version 14
capture log close
set more off

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"
global MY_IN_PATH   "C:\Users\benjamin.balsmeier\Dropbox\Research\REStat\data"
global MY_OUT_PATH  "C:\Users\benjamin.balsmeier\Dropbox\Research\REStat\data"
*global MY_TEMP_PATH "..."

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_tp.log

log using "${MY_LOG_FILE}", text replace
****************************************************************************************************
* import data
*************************************************************************************************

use ${MY_IN_PATH}/patents.dta, clear

* create matrix for uspc level

* gen class dummy variables
foreach num of numlist 1/481 {
	gen c`num' = 0
	replace c`num' = 1 if uspcid == `num'
	dis `num'
}

* sum pats at the gvkey level
foreach num of numlist 1/481 {
	dis `num'
	gegen h`num' = sum(c`num'), by(gvkey ayear)
}

drop c1-c481

keep patent uspcid ayear appl_dt gvkey h1-h481 m_kpss

bysort gvkey ayear: gen n =_n
keep if n ==1
drop n

foreach num of numlist 1/481 {
ren h`num' c`num'
}

* gen pat_stock_per class and gvkey
sort gvkey ayear
foreach num of numlist 1/481 {
gen c`num'_cum5 = 0
dis `num'
}
sort gvkey ayear
foreach num of numlist 1/481 {
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-1] if gvkey==gvkey[_n-1] & ayear == (ayear[_n-1]+1) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-2] if gvkey==gvkey[_n-2] & ayear == (ayear[_n-2]+2) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-3] if gvkey==gvkey[_n-3] & ayear == (ayear[_n-3]+3) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-4] if gvkey==gvkey[_n-4] & ayear == (ayear[_n-4]+4) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-5] if gvkey==gvkey[_n-5] & ayear == (ayear[_n-5]+5) 
dis `num'
}

** test
foreach num of numlist 1/481 {
dis `num'
count if c`num'_cum5 - c`num'_cum > 0
}

*** gen dummy for entering new techclass
foreach num of numlist 1/481 {
gen new`num' = 0
replace new`num' = 1 if c`num' > 0 & c`num'_cum5 ==0
}

* Gen pats in new classes
foreach num of numlist 1/481 {
gen new`num'_sum = 0
replace new`num'_sum = c`num' if c`num' > 0 & c`num'_cum5 ==0
}


gen no_new_cl5 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num' = rowtotal(new1-new481) if ayear ==`num'
replace no_new_cl5 = no_new_cl`num' if ayear ==`num'
drop no_new_cl`num'
}


gen no_patnew_cl5 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num' = rowtotal(new1_sum-new481_sum) if ayear ==`num'
replace no_patnew_cl5 = no_new_cl`num' if ayear ==`num'
drop no_new_cl`num'
}

sum no_new_cl5 no_patnew_cl5
save ${MY_IN_PATH}/techprox_temp4.dta, replace

drop new1-new481 new1_sum-new481_sum


**********************************************************************************
******Technological Proximity
*Jaffe's technological prximity measure  
*T_i_j=(Fi*Fj')/( ((Fi*Fi')^0.5)*(Fj*Fj')^0.5))
* ==> T_i_it-1=(Fi*Fit-1')/( ((Fi*Fi')^0.5)*(Fit-1*Fit-1')^0.5))

*1. gen patent stock in t-1 to t-5
gegen pat_stock5 = rowtotal(c1_cum5-c481_cum5) 

*2. fraction of stock patents in each class
foreach num of numlist 1/481 {
gen pat_stock_fr_`num' = c`num'_cum5/pat_stock5
}

foreach num of numlist 1/481 {
replace pat_stock_fr_`num'  = 0 if pat_stock_fr_`num' ==.
}

*3. Count number of patents per year 
egen npat = rowtotal(c1-c481) 
sum npat

*4. gen fraction of patents in each class in t
foreach num of numlist 1/481 {
gen pat_fr_`num' = c`num'/npat
}

*** Calculate techprox
gen fi_fj = 0
gen fi_sq = 0
gen fj_sq = 0
foreach num of numlist 1/481 {
dis `num'
replace fi_fj = fi_fj + (pat_fr_`num') * (pat_stock_fr_`num') 
replace fi_sq = fi_sq + (pat_fr_`num')^2
replace fj_sq = fj_sq + (pat_stock_fr_`num')^2
}

gen T_i_j= fi_fj / sqrt(fi_sq*fj_sq)
gen tp_raw5 = T_i_j

sum npat no_new_cl5 no_patnew_cl5 tp5 tp_raw5 pat_stock5

ren ayear year
ren npat npat2020
ren no_patnew_cl5 npat_new5_2020
ren tp_raw5 tp_raw5_2020 
gen npat_old5_2020 = npat2020 - npat_new5_2020
gen fr_npat_new5_2020 = npat_new5_2020/npat2020 
ren no_new_cl5 no_new_cl5_2020

sum npat2020 npat_new5_2020 npat_old5_2020 fr_npat_new5_2020 tp_raw5_2020 no_new_cl5_2020

destring gvkey, replace
keep gvkey year npat2020 npat_new5_2020 npat_old5_2020 fr_npat_new5_2020 no_new_cl5_2020 tp5_2020 tp_raw5_2020 
save ${MY_IN_PATH}/techprox_newptas20200826_pdate.dta, replace
